This notebook computes indicies for sounds (or soundscape recordings) in a pumilio database.
working_directory – temporary directory for processing recordings
In [7]:
working_directory = os.path.join(os.environ['HOME'], "pymilio_temp/")
In [1]:
%load_ext rpy2.ipython
In [2]:
import rpy2.robjects as ro
In [3]:
%%R
library('tuneR')
library('seewave')
library('soundecology')
In [4]:
import pandas
import pandas.rpy.common as com
# new version not working...
#from rpy2.robjects import pandas2ri
#pandas2ri.activate()
#from rpy2.robjects import r
In [5]:
import os.path
from shutil import rmtree
import subprocess
In [6]:
import pyprind
In [9]:
from Pymilio import database
In [10]:
pumilio_db = database.Pymilio_db_connection(user='pumilio',
database='pumilio',
read_default_file='~/.my.cnf.pumilio')
get all sounds
In [11]:
sounds = pumilio_db.get_sound_paths(prepath=source_directory)
database functions
In [12]:
def insert_row(table, columns, values):
pumilio_db.insert(table, values=(columns, values))
In [13]:
def update_row(table, values, where):
pumilio_db.update(table=table, values=values, where=where)
index calculation functions
In [14]:
# acoustic complexity index
def calculateACI(sound):
# specify command with all arguments and language used for computation
language = 'R'
command = "acoustic_complexity(sound, min_freq = NA, max_freq = NA, j = 5, fft_w = 512)"
# load wav file in R environment
r_command = """capture.output(if (!(exists("sound"))) {{ sound <- readWave("{0}") }}, file=NULL)""".format(sound)
ro.r(r_command);
# run calculation in R environment
r_command = """capture.output(ACI <- {0}, file=NULL)""".format(command)
ro.r(r_command);
# load results into python environment
ACI = com.load_data('ACI')
results = {
'AciTotAll_left':ACI['AciTotAll_left'][0],
'AciTotAll_right':ACI['AciTotAll_right'][0],
'AciTotAll_left_bymin':ACI['AciTotAll_left_bymin'][0],
'AciTotAll_right_bymin':ACI['AciTotAll_right_bymin'][0]
}
return results, language, command
In [15]:
# acoustic diversity index
def calculateADI(sound):
# specify command with all arguments and language used for computation
language = 'R'
command = "acoustic_diversity(sound, max_freq = 12000, db_threshold = -50, freq_step = 1000, shannon = TRUE)"
# load wav file in R environment
r_command = """capture.output(if (!(exists("sound"))) {{ sound <- readWave("{0}") }}, file=NULL)""".format(sound)
ro.r(r_command);
# run calculation in R environment
r_command = """capture.output(ADI <- {0}, file=NULL)""".format(command)
ro.r(r_command);
# load results into python environment
ADI = com.load_data('ADI')
#com.convert_robj(ADI)
# new version not working ...
#r.data('ADI')
#ADI = pandas2ri.ri2py(r['ADI'])
results = {
'adi_left':ADI['adi_left'][0],
'adi_right':ADI['adi_right'][0],
'left_band_values':str(ADI['left_band_values']),
'left_bandrange_values':"['" + "', '".join(ADI['left_bandrange_values']) + "']",
'right_band_values':str(ADI['right_band_values']),
'right_bandrange_values':"['" + "', '".join(ADI['right_bandrange_values']) + "']"
}
return results, language, command
In [16]:
# acoustic evenness index
def calculateAEI(sound):
# specify command with all arguments and language used for computation
language = 'R'
command = "acoustic_evenness(sound, max_freq = 12000, db_threshold = -50, freq_step = 1000"
# load wav file in R environment
r_command = """capture.output(if (!(exists("sound"))) {{ sound <- readWave("{0}") }}, file=NULL)""".format(sound)
ro.r(r_command);
# run calculation in R environment
r_command = """capture.output(AEI <- {0}), file=NULL)""".format(command)
ro.r(r_command);
# load results into python environment
AEI = com.load_data('AEI')
results = {
'aei_left':AEI['aei_left'][0],
'aei_right':AEI['aei_right'][0]
}
return results, language, command
In [17]:
# bioacoustic index
def calculateBAI(sound):
# specify command with all arguments and language used for computation
language = 'R'
command = "bioacoustic_index(sound, min_freq = 2000, max_freq = 8000, fft_w = 512)"
# load wav file in R environment
r_command = """capture.output(if (!(exists("sound"))) {{ sound <- readWave("{0}") }}, file=NULL)""".format(sound)
ro.r(r_command);
# run calculation in R environment
r_command = """capture.output(BAI <- {0}, file=NULL)""".format(command)
ro.r(r_command);
# load results into python environment
BAI = com.load_data('BAI')
results = {
'left_area':BAI['left_area'][0],
'right_area':BAI['right_area'][0]
}
return results, language, command
In [18]:
# nsdi index (normalized difference soundscape index)
def calculateNDSI(sound):
# specify command with all arguments and language used for computation
language = 'R'
command = "ndsi(sound, fft_w = 1024, anthro_min = 1000, anthro_max = 2000, bio_min = 2000, bio_max = 12000)"
# load wav file in R environment
r_command = """capture.output(if (!(exists("sound"))) {{ sound <- readWave("{0}") }}, file=NULL)""".format(sound)
ro.r(r_command);
# run calculation in R environment
r_command = """capture.output(NDSI <- {0}, file=NULL)""".format(command)
ro.r(r_command);
# load results into python environment
NDSI = com.load_data('NDSI')
results = {
'ndsi_left':NDSI['ndsi_left'][0],
'ndsi_right':NDSI['ndsi_right'][0],
'biophony_left':NDSI['biophony_left'][0],
'anthrophony_left':NDSI['anthrophony_left'][0],
'biophony_right':NDSI['biophony_right'][0],
'anthrophony_right':NDSI['anthrophony_right'][0]
}
return results, language, command
In [19]:
# soundscapespec
def calculateSSS(sound):
# specify command with all arguments and language used for computation
language = 'R'
command = """soundscapespec(sound, wl = 1024, wn = "hamming", ovlp = 50, plot = FALSE)"""
# load wav file in R environment
r_command = """capture.output(if (!(exists("sound"))) {{ sound <- readWave("{0}") }}, file=NULL)""".format(sound)
ro.r(r_command);
# run calculation in R environment
r_command = """capture.output(SSS <- {0}, file=NULL)""".format(command)
ro.r(r_command);
# load results into python environment
SSS = com.load_data('SSS')
results = {
'frequency_power':str([ '{0:.4f}'.format(n) for n in SSS['amplitude'].as_matrix() ]),
}
return results, language, command
testing...
In [20]:
def log_process(string):
log_directory = os.path.join(os.environ['HOME'], "Desktop/process_log/")
if not os.path.exists(log_directory):
os.mkdir(log_directory)
log_name = "process_log.txt"
if os.path.exists(log_directory + log_name):
log_file = open(log_directory + log_name, 'a')
else:
log_file = open(log_directory + log_name, 'w')
log_file.write(string+'\n')
log_file.close()
In [21]:
#results = calculateBAI("/Users/Jake/Desktop/test/160224-160000.wav")
In [22]:
#sounds = {'129': '/Users/Jake/Desktop/test/160224-160000.wav'}
conversion functions
In [23]:
def convertFLACtoWAV(flacfile, wavfile):
if os.path.exists(working_directory):
rmtree(working_directory)
os.mkdir(working_directory)
subprocess.check_output(["flac", "-d", flacfile, "-o", wavfile])
check and calculate missing indicies for all sounds in the database
In [24]:
# list of supported indicies
#indices = ['ACI', 'ADI', 'AEI', 'BAI', 'NDSI', 'SSS']
indices = ['SSS']
progress_bar = pyprind.ProgBar(len(sounds), bar_char='█', title='Process progress', monitor=True, stream=1, width=50)
for sound in sounds:
# compute all indices for the current sound
for index in indices:
# update progress bar
progress_bar.update(item_id = sound+" -> "+index)
tablename = 'Index' + index
row = pumilio_db.fetch_as_pandas_df(table=tablename, where="Sound = '{0}'".format(sound))
# check if a calculation for the current sound and index already exists
if len(row) == 0:
insert_row(table=tablename, columns='Sound', values=sound)
# create a wav file for analysis if one does not already exist in the working directory
filename = os.path.basename(sounds[sound]).strip('.flac')
wavfile = os.path.join(working_directory, filename) + '.wav'
if not os.path.exists(wavfile):
convertFLACtoWAV(sounds[sound], wavfile)
# compute index
results, language, command = eval("calculate{0}(wavfile)".format(index))
# update each column in database for each part of the index returned
for item in results:
values = (item, results[item])
update_row(table=tablename, values=values, where="Sound = '{0}'".format(sound))
# check if command already exists in the 'Analyses' table
command_row = pumilio_db.fetch_as_pandas_df(table='Analyses', where="command = '{0}'".format(command))
if len(command_row) == 0:
# add the command to the 'Analyses table if it does not exist
insert_row(table='Analyses', columns='command', values=command)
# get id of new row
command_row = pumilio_db.fetch_as_pandas_df(table='Analyses', where="command = '{0}'".format(command))
command_ID = command_row['ID'][0]
# update all other columns
update_row(table='Analyses', values=('name', index), where="ID = '{0}'".format(command_ID))
update_row(table='Analyses', values=('language', language), where="ID = '{0}'".format(command_ID))
elif len(command_row) == 1:
# if the command exists, get the command_id
command_ID = command_row['ID'][0]
else:
# error!
pass
# update the command_id for the index calculation
update_row(table=tablename, values=('command_ID', command_ID), where="Sound = '{0}'".format(sound))
elif len(row) == 1:
pass
#print("A record in the {0} table already exists for Sound {1}".format(tablename, sound))
else:
pass
#print("WARNING: There were {0} analysis rows returned for SoundID = {0}".format(str(len(analyses)), sound))
# cleanup R environment
ro.r("remove('sound')")
# cleanup working directory
if os.path.exists(working_directory):
rmtree(working_directory)
print('\n')
progress_bar.update()
print(progress_bar)